Fragestellung: “Die Manschafft, die zur Halbzeit vorne liegt, gewinnt mit einer Chance von mindestens 75% auch das Spiel. Falls zur Halbzeit unentschieden ist, gewinnt eher das Heimteam.”
Dafür nehmen wir den Datacamp Datensatz Soccer Data
Als Einführung werden wir auf Datacamp folgende Kurse durchgehen:
# Bibliotheken importieren
library("plotly")
library("plyr")
# List files in Data folder
files <- list.files(path="./Data/", pattern=NULL, all.files=FALSE, full.names=TRUE)
# Create DataFrame with all csv from 2015-2019
df <- ldply(.data = files, .fun = read.csv)
# View entire DataFrame in R Studio
View(df)
htr_table <- df %>%
count(HTR)
ftr_table <- df %>%
count(FTR)
Results = c("Away", "Draw", "Home")
HT_count <- c(htr_table$n)
FT_count <- c(ftr_table$n)
df_results <- data.frame(Results, HT_count, FT_count)
fig <- plot_ly(
df_results, x = ~Results, y = ~HT_count, type = 'bar', name = 'Half Time Score') %>%
add_trace(y = ~FT_count, name = 'Full Time Score') %>%
layout(yaxis = list(title = 'Count'), barmode = 'group')
fig
NA
# HTR & FTR in einer Spalte zusammenfügen
df$result <- paste(df$HTR, df$FTR)
df_count_results <- df %>%
group_by(result) %>%
summarise(count_result = n() / nrow(df) * 100)
df_count_results %>%
plot_ly(x = ~reorder(result, count_result), y = ~count_result) %>%
add_bars() %>%
layout(xaxis = list(categoryorder = "total descending", title = "Game Progress"),
yaxis = list(title = "Probability"),
title = "What is the probability of a game progress?")
NA
calc_prob <- function(df1, df2) {
win_prob <- 100 / nrow(df1) * nrow(df2)
}
df_halftime_home <- df %>% filter(HTR == "H")
df_halftime_away <- df %>% filter(HTR == "A")
df_halftime_draw <- df %>% filter(HTR == "D")
df_fulltime_home_win <- df_halftime_home %>% filter(FTR == "H")
df_fulltime_away_win <- df_halftime_away %>% filter(FTR == "A")
df_fulltime_draw <- df_halftime_draw %>% filter(FTR == "D")
# Heimteam führt zur Halbzeit und gewinnt das Spiel
home_win_prob <- calc_prob(df_halftime_home, df_fulltime_home_win)
print(home_win_prob)
[1] 82.54902
print(nrow(df_fulltime_home_win))
[1] 421
# Auswärtsteam führt zur Halbzeit und gewinnt das Spiel
away_win_prob <- calc_prob(df_halftime_away, df_fulltime_away_win)
print(away_win_prob)
[1] 72.03166
print(nrow(df_fulltime_away_win))
[1] 273
# Unentschieden zur Halbzeit und auch am Ende des Spiels
draw_prob <- calc_prob(df_halftime_draw, df_fulltime_draw)
print(draw_prob)
[1] 36.45008
print(nrow(df_fulltime_draw))
[1] 230
LS0tDQp0aXRsZTogIkRhdGF2aXogbWl0IFBsb3RseSBQTCBEYXRhIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMgRnJhZ2VzdGVsbHVuZzogIkRpZSBNYW5zY2hhZmZ0LCBkaWUgenVyIEhhbGJ6ZWl0IHZvcm5lIGxpZWd0LCBnZXdpbm50IG1pdCBlaW5lciBDaGFuY2Ugdm9uIG1pbmRlc3RlbnMgNzUlIGF1Y2ggZGFzIFNwaWVsLiBGYWxscyB6dXIgSGFsYnplaXQgdW5lbnRzY2hpZWRlbiBpc3QsIGdld2lubnQgZWhlciBkYXMgSGVpbXRlYW0uIg0KDQoNCkRhZsO8ciBuZWhtZW4gd2lyIGRlbiBEYXRhY2FtcCBEYXRlbnNhdHogW1NvY2NlciBEYXRhXShodHRwczovL2FwcC5kYXRhY2FtcC5jb20vd29ya3NwYWNlL2RhdGFzZXRzL2RhdGFzZXQtcHl0aG9uLXNvY2NlcikNCg0KQWxzIEVpbmbDvGhydW5nIHdlcmRlbiB3aXIgYXVmIERhdGFjYW1wIGZvbGdlbmRlIEt1cnNlIGR1cmNoZ2VoZW46DQoNCi0gW0ludGVyYWN0aXZlIERhdGEgVmlzdWFsaXphdGlvbiB3aXRoIHBsb3RseV0oaHR0cHM6Ly9hcHAuZGF0YWNhbXAuY29tL2xlYXJuL2NvdXJzZXMvaW50ZXJhY3RpdmUtZGF0YS12aXN1YWxpemF0aW9uLXdpdGgtcGxvdGx5LWluLXIpDQoNCi0gW0ludGVybWVkaWF0ZSBJbnRlcmFjdGl2ZSBEYXRhIFZpc3VhbGl6YXRpb24gd2l0aCBwbG90bHldKGh0dHBzOi8vYXBwLmRhdGFjYW1wLmNvbS9sZWFybi9jb3Vyc2VzL2ludGVyYWN0aXZlLWRhdGEtdmlzdWFsaXphdGlvbi13aXRoLXBsb3RseS1pbi1yKQ0KDQpgYGB7cn0NCiMgQmlibGlvdGhla2VuIGltcG9ydGllcmVuDQpsaWJyYXJ5KCJwbG90bHkiKQ0KbGlicmFyeSgicGx5ciIpDQpgYGANCg0KYGBge3J9DQojIExpc3QgZmlsZXMgaW4gRGF0YSBmb2xkZXINCmZpbGVzIDwtIGxpc3QuZmlsZXMocGF0aD0iLi9EYXRhLyIsIHBhdHRlcm49TlVMTCwgYWxsLmZpbGVzPUZBTFNFLCBmdWxsLm5hbWVzPVRSVUUpDQoNCiMgQ3JlYXRlIERhdGFGcmFtZSB3aXRoIGFsbCBjc3YgZnJvbSAyMDE1LTIwMTkNCmRmIDwtIGxkcGx5KC5kYXRhID0gZmlsZXMsIC5mdW4gPSByZWFkLmNzdikNCg0KIyBWaWV3IGVudGlyZSBEYXRhRnJhbWUgaW4gUiBTdHVkaW8NClZpZXcoZGYpDQoNCmBgYA0KDQpgYGB7cn0NCmh0cl90YWJsZSA8LSBkZiAlPiUNCgljb3VudChIVFIpDQoNCmZ0cl90YWJsZSA8LSBkZiAlPiUNCgljb3VudChGVFIpDQoNClJlc3VsdHMgPSBjKCJBd2F5IiwgIkRyYXciLCAiSG9tZSIpDQpIVF9jb3VudCA8LSBjKGh0cl90YWJsZSRuKQ0KRlRfY291bnQgPC0gYyhmdHJfdGFibGUkbikNCg0KZGZfcmVzdWx0cyA8LSBkYXRhLmZyYW1lKFJlc3VsdHMsIEhUX2NvdW50LCBGVF9jb3VudCkNCg0KZmlnIDwtIHBsb3RfbHkoDQogIGRmX3Jlc3VsdHMsIHggPSB+UmVzdWx0cywgeSA9IH5IVF9jb3VudCwgdHlwZSA9ICdiYXInLCBuYW1lID0gJ0hhbGYgVGltZSBTY29yZScpICU+JSANCiAgYWRkX3RyYWNlKHkgPSB+RlRfY291bnQsIG5hbWUgPSAnRnVsbCBUaW1lIFNjb3JlJykgJT4lDQogIGxheW91dCh5YXhpcyA9IGxpc3QodGl0bGUgPSAnQ291bnQnKSwgYmFybW9kZSA9ICdncm91cCcpDQoNCmZpZw0KDQpgYGANCg0KYGBge3J9DQojIEhUUiAmIEZUUiBpbiBlaW5lciBTcGFsdGUgenVzYW1tZW5mw7xnZW4NCg0KZGYkcmVzdWx0IDwtIHBhc3RlKGRmJEhUUiwgZGYkRlRSKQ0KDQpkZl9jb3VudF9yZXN1bHRzIDwtIGRmICU+JSANCiAgZ3JvdXBfYnkocmVzdWx0KSAlPiUgDQogIHN1bW1hcmlzZShjb3VudF9yZXN1bHQgPSBuKCkgLyBucm93KGRmKSAqIDEwMCkNCg0KZGZfY291bnRfcmVzdWx0cyAlPiUNCiAgcGxvdF9seSh4ID0gfnJlb3JkZXIocmVzdWx0LCBjb3VudF9yZXN1bHQpLCB5ID0gfmNvdW50X3Jlc3VsdCkgJT4lDQogIGFkZF9iYXJzKCkgJT4lDQogIGxheW91dCh4YXhpcyA9IGxpc3QoY2F0ZWdvcnlvcmRlciA9ICJ0b3RhbCBkZXNjZW5kaW5nIiwgdGl0bGUgPSAiR2FtZSBQcm9ncmVzcyIpLA0KICAgICAgICAgeWF4aXMgPSBsaXN0KHRpdGxlID0gIlByb2JhYmlsaXR5IiksDQogICAgICAgICB0aXRsZSA9ICJXaGF0IGlzIHRoZSBwcm9iYWJpbGl0eSBvZiBhIGdhbWUgcHJvZ3Jlc3M/IikNCg0KYGBgDQpgYGB7cn0NCmNhbGNfcHJvYiA8LSBmdW5jdGlvbihkZjEsIGRmMikgew0KICB3aW5fcHJvYiA8LSAxMDAgLyBucm93KGRmMSkgKiBucm93KGRmMikNCn0NCmBgYA0KDQpgYGB7cn0NCmRmX2hhbGZ0aW1lX2hvbWUgPC0gZGYgJT4lIGZpbHRlcihIVFIgPT0gIkgiKQ0KZGZfaGFsZnRpbWVfYXdheSA8LSBkZiAlPiUgZmlsdGVyKEhUUiA9PSAiQSIpDQpkZl9oYWxmdGltZV9kcmF3IDwtIGRmICU+JSBmaWx0ZXIoSFRSID09ICJEIikNCg0KZGZfZnVsbHRpbWVfaG9tZV93aW4gPC0gZGZfaGFsZnRpbWVfaG9tZSAlPiUgZmlsdGVyKEZUUiA9PSAiSCIpDQpkZl9mdWxsdGltZV9hd2F5X3dpbiA8LSBkZl9oYWxmdGltZV9hd2F5ICU+JSBmaWx0ZXIoRlRSID09ICJBIikNCmRmX2Z1bGx0aW1lX2RyYXcgPC0gZGZfaGFsZnRpbWVfZHJhdyAlPiUgZmlsdGVyKEZUUiA9PSAiRCIpDQoNCiMgSGVpbXRlYW0gZsO8aHJ0IHp1ciBIYWxiemVpdCB1bmQgZ2V3aW5udCBkYXMgU3BpZWwNCmhvbWVfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2hhbGZ0aW1lX2hvbWUsIGRmX2Z1bGx0aW1lX2hvbWVfd2luKQ0KcHJpbnQoaG9tZV93aW5fcHJvYikNCnByaW50KG5yb3coZGZfZnVsbHRpbWVfaG9tZV93aW4pKQ0KDQojIEF1c3fDpHJ0c3RlYW0gZsO8aHJ0IHp1ciBIYWxiemVpdCB1bmQgZ2V3aW5udCBkYXMgU3BpZWwNCmF3YXlfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2hhbGZ0aW1lX2F3YXksIGRmX2Z1bGx0aW1lX2F3YXlfd2luKQ0KcHJpbnQoYXdheV93aW5fcHJvYikNCnByaW50KG5yb3coZGZfZnVsbHRpbWVfYXdheV93aW4pKQ0KDQojIFVuZW50c2NoaWVkZW4genVyIEhhbGJ6ZWl0IHVuZCBhdWNoIGFtIEVuZGUgZGVzIFNwaWVscw0KZHJhd19wcm9iIDwtIGNhbGNfcHJvYihkZl9oYWxmdGltZV9kcmF3LCBkZl9mdWxsdGltZV9kcmF3KQ0KcHJpbnQoZHJhd19wcm9iKQ0KcHJpbnQobnJvdyhkZl9mdWxsdGltZV9kcmF3KSkNCg0KYGBgDQpgYGB7cn0NCg0KYGBgDQoNCg==